import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.api import VAR

df = pd.read_csv('data/sentiment.csv', sep='\t', header=0)
scheer = df[df['politician_tag'] == 'scheer']
trudeau = df[df['politician_tag'] == 'trudeau']

#=======================================================================#
# Sentiment by Leader
#=======================================================================#
# A. Scheer series
# Create group means:
sst = scheer.groupby(['date','hour','bot'])['sentiment'].mean().reset_index()
sst.columns = ['date','hour','bot','hrly_sent']
# Create date series
sst['date_hr'] = sst[['date', 'hour']].astype(str).apply(lambda x: '_'.join(x), axis=1)
# Note: adjusting to Eastern time, from UTC time (minus five hours)
sst['date_hr'] =  pd.to_datetime(pd.to_datetime(sst['date_hr'],format='%Y-%m-%d_%H') - pd.Timedelta(hours=5),format='%Y-%m-%d_%H')
sst = sst[['date_hr','bot','hrly_sent']]
# Pivot
sst = sst.pivot(index='date_hr', columns='bot', values='hrly_sent').reset_index()
# Linear interpolation
sst['bot'] = sst.bot.interpolate()
# Moving Average
sst['s-bot'] = sst.bot.rolling(window=18).mean()
sst['s-not'] = sst.human.rolling(window=18).mean()

# J. Trudeau series
# Create group means:
tst = trudeau.groupby(['date','hour','bot'])['sentiment'].mean().reset_index()
tst.columns = ['date','hour','bot','hrly_sent']
# Create date series
tst['date_hr'] = tst[['date', 'hour']].astype(str).apply(lambda x: '_'.join(x), axis=1)
# Note: adjusting to Eastern time, from UTC time (minus five hours)
tst['date_hr'] = pd.to_datetime(pd.to_datetime(tst['date_hr'],format='%Y-%m-%d_%H') - pd.Timedelta(hours=5),format='%Y-%m-%d_%H')
tst = tst[['date_hr','bot','hrly_sent']]
# Pivot
tst = tst.pivot(index='date_hr', columns='bot', values='hrly_sent').reset_index()
# Moving Average
tst['t-bot'] = tst.bot.rolling(window=18).mean()
tst['t-not'] = tst.human.rolling(window=18).mean()

#=======================================================================#
#
# Figure 1. Hourly Leader Sentiment on Twitter, by User Type
#
#=======================================================================#

ax = plt.gca()
sst.plot(kind='line',x='date_hr',y='s-bot', color='#027fdc',alpha=1, ax=ax,figsize=(30,8),lw=1)
tst.plot(kind='line',x='date_hr',y='t-bot', color='#f76f73',alpha=1, ax=ax,figsize=(30,8),lw=1)
sst.plot(kind='line',x='date_hr',y='s-not', color='#027fdc',alpha=2, ax=ax,figsize=(30,8),lw=3)
tst.plot(kind='line',x='date_hr',y='t-not', color='#f76f73',alpha=2, ax=ax,figsize=(30,8),lw=3)
ax.axvspan('2019-09-05', '2019-09-06', alpha=2, color='lightgray') # trudeau not coming to debate 1
ax.axvspan('2019-09-18', '2019-09-20', alpha=2, color='lightgray') # brownface photo releases
ax.axvspan('2019-10-02', '2019-10-03', alpha=2, color='lightgray') # 2 planes press release
ax.axvspan('2019-10-16', '2019-10-17', alpha=2, color='lightgray') # obama endorsement
ax.axvspan('2019-09-12', '2019-09-12', alpha=5, color='black',lw=3) # debate
ax.axvspan('2019-10-02', '2019-10-02', alpha=5, color='black',lw=3) # debate
ax.axvspan('2019-10-07', '2019-10-07', alpha=5, color='black',lw=3) # debate
ax.axvspan('2019-10-10', '2019-10-10', alpha=5, color='black',lw=3) # debate
props = dict(boxstyle='round', facecolor='lightgray', alpha=0.5)
custom_lines = [Line2D([0], [0], color='#f76f73', lw=3),
                Line2D([0], [0], color='#f76f73', lw=1),
                Line2D([0], [0], color='#027fdc', lw=3),
                Line2D([0], [0], color='#027fdc', lw=1)]

plt.xlabel("");
plt.ylabel("Sentiment", fontsize=20);
plt.tick_params(labelsize=18);
plt.grid(linestyle='-', linewidth=0.5,axis='both',which='both');
plt.legend(handles=custom_lines,
           labels=['Trudeau (Humans)','Trudeau (Bots)','Scheer (Humans)','Scheer (Bots)'],
           fontsize=16, loc='upper left');
plt.savefig('figures/figure1.jpeg', dpi=200, bbox_inches='tight');

#=======================================================================#
#
# Table A2: Granger Causality Tests
#
#=======================================================================#

d1 = tst[['bot', 'human']]
d2 = sst[['bot', 'human']]

res = []
for df, name in [(d1, 'Trudeau'),(d2, 'Scheer')]:
    model = VAR(df[['bot', 'human']])
    results = model.fit(maxlags=25, ic='aic')
    lag_auto = results.k_ar
    for l in [lag_auto, 12, 24]:
        g1 = grangercausalitytests(df[['bot','human']].values, maxlag=25, verbose=False)[l][0]['ssr_chi2test']
        res.append((name, 'Human -> Bot', g1[2], '%0.3f' %g1[0], '%0.3f' %g1[1]))
    for l in [lag_auto, 12, 24]:
        g2 = grangercausalitytests(df[['human','bot']].values, maxlag=25, verbose=False)[l][0]['ssr_chi2test']
        res.append((name, 'Bot -> Human', g2[2], '%0.3f' %g2[0], '%0.3f' %g2[1]))

with open('tables/tableA2.txt','w') as fout:
    print(pd.DataFrame(res, columns=['series', 'direction', 'lags', 'chi2', 'p-value']).to_string(index=False), file=fout)

